This is the visualisation script for the greenhouse gas data product which was used in the QA/QC process and review of the data package 1. It takes the data file on EDI and combines it with the L1 file which has already been QAQCed. 3. Lists the flag frequency to check if there are any NAs or any assigned the wrong flag. 5. Creates plots to visualise all variables and sites 6. Writes the new combined data to new csv
This section checks to make sure each observation has a data flag. It also checks to make sure the frequency of flags match what we expect to see.
#make sure no NAS in the Flag columns
Flags <- current_df |>
select(DateTime, starts_with("Flag"))
RowsNA <- Flags[!complete.cases(Flags), ] # Keep only the complete rows
#check the flag column
Flags <- current_df |>
select(starts_with("Flag"))
# Make a table with the number of times a flag was used
for(f in 1:(ncol(Flags))){
#print(colnames(Flags[f]))
print(table(Flags[,f], useNA = "always"))
}
##
## 0 1 <NA>
## 5322 2630 0
##
## 0 1 2 3 4 6 7 <NA>
## 5509 624 398 387 793 53 15 173
##
## 0 1 2 3 4 6 7 <NA>
## 5830 741 329 159 470 159 18 246
We want to make sure that our maintenance log actually worked and took out the values or changes those it was supposed to
## Rows: 9 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): Reservoir, DataStream, TIMESTAMP_start, TIMESTAMP_end, start_parame...
## dbl (5): Site, Depth, Reps, vial_number, flag
## lgl (1): update_value
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## # A tibble: 6 × 13
## Reservoir Site Depth Reps DataStream TIMESTAMP_start TIMESTAMP_end
## <chr> <dbl> <dbl> <dbl> <chr> <chr> <chr>
## 1 FCR 50 3.8 NA GHG 2024-04-15 09:53:00 EDT 2024-04-15 10:…
## 2 BVR 50 3 NA GHG 2024-05-06 10:32:00 EDT 2024-05-06 11:…
## 3 FCR 50 5 NA GHG 2024-05-31 11:42:00 EDT 2024-05-31 12:…
## 4 FCR 1.1 0.1 NA GHG 2024-08-26 10:56:00 EDT 2024-08-26 11:…
## 5 FCR 200 0.1 NA GHG 2024-09-02 13:15:00 EDT 2024-09-02 14:…
## 6 FCR 50 8 NA GHG 2024-12-03 10:52:00 EDT 2024-12-03 10:…
## # ℹ 6 more variables: start_parameter <chr>, end_parameter <chr>,
## # vial_number <dbl>, flag <dbl>, update_value <lgl>, notes <chr>
| Reservoir | Site | Depth | Reps | DataStream | TIMESTAMP_start | TIMESTAMP_end | start_parameter | end_parameter | vial_number | flag | update_value | notes |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| FCR | 50.0 | 3.8 | NA | GHG | 2024-04-15 09:53:00 EDT | 2024-04-15 10:53:00 EDT | CO2_umolL | NA | 328 | 1 | NA | Moisture interfered with CO2 peak and not usable |
| BVR | 50.0 | 3.0 | NA | GHG | 2024-05-06 10:32:00 EDT | 2024-05-06 11:32:00 EDT | CO2_umolL | NA | 271 | 1 | NA | Moisture interfered with CO2 peak |
| FCR | 50.0 | 5.0 | NA | GHG | 2024-05-31 11:42:00 EDT | 2024-05-31 12:42:00 EDT | CO2_umolL | NA | 21 | 1 | NA | Moisture interfered with CO2 peak |
| FCR | 1.1 | 0.1 | NA | GHG | 2024-08-26 10:56:00 EDT | 2024-08-26 11:56:00 EDT | CO2_umolL | NA | 97 | 1 | NA | Moisture interfered with CO2 peak |
| FCR | 200.0 | 0.1 | NA | GHG | 2024-09-02 13:15:00 EDT | 2024-09-02 14:15:00 EDT | CO2_umolL | NA | 345 | 1 | NA | Moisture interfered with CO2 peak |
| FCR | 50.0 | 8.0 | NA | GHG | 2024-12-03 10:52:00 EDT | 2024-12-03 10:54:00 EDT | CO2_umolL | NA | 215 | 1 | NA | Very end of CO2 peak was cut off |
Look at the first few rows of the data frame and check that the observations after the TIMESTAMP_start are flagged
## DateTime CO2_umolL Flag_CO2_umolL
## 7947 2024-12-03 10:53:00 NA 1
## 7948 2024-12-03 10:53:00 67.0538 4
## DateTime CO2_umolL Flag_CO2_umolL
## 7947 2024-12-03 10:53:00 NA 1
## 7948 2024-12-03 10:53:00 67.0538 4
# Plot CH4 at Site 50 in FCR
current_df |>
filter(Depth_m<100, Site == 50,
Reservoir == "FCR") |>
mutate(Depth_m = as_factor(Depth_m)) |>
ggplot(aes(x = DateTime, y = CH4_umolL, colour = Depth_m))+
geom_point() +
labs(title = 'FCR CH4')
## Warning: Removed 361 rows containing missing values or values outside the scale range
## (`geom_point()`).
# Plot CO2 at Site 50 in FCR
current_df |>
filter(Depth_m<100, Site == 50,
Reservoir == "FCR") |>
mutate(Depth_m = as_factor(Depth_m)) |>
ggplot(aes(x = DateTime, y = CO2_umolL, colour = Depth_m))+
geom_point()+
labs(title = 'FCR CO2')
## Warning: Removed 446 rows containing missing values or values outside the scale range
## (`geom_point()`).
# Plot CH4 at Site 50 in BVR
current_df |>
filter(Depth_m<100, Site == 50,
Reservoir == "BVR") |>
mutate(Depth_m = as_factor(Depth_m)) |>
ggplot(aes(x = DateTime, y = CH4_umolL, colour = Depth_m))+
geom_point()+
labs(title = 'BVR CH4')
## Warning: Removed 168 rows containing missing values or values outside the scale range
## (`geom_point()`).
# Plot CO2 at Site 50 in BVR
current_df |>
filter(Depth_m<100, Site == 50,
Reservoir == "BVR") |>
mutate(Depth_m = as_factor(Depth_m)) |>
ggplot(aes(x = DateTime, y = CO2_umolL, colour = Depth_m))+
geom_point() +
labs(title = 'BVR CO2')
## Warning: Removed 195 rows containing missing values or values outside the scale range
## (`geom_point()`).
# FCR NOT Site 50 - CH4
current_df |>
filter(Reservoir == "FCR", Site != 50) |>
ggplot(aes(x=DateTime, y=CH4_umolL, colour=as.factor(Site)))+
geom_point() +
labs(title = 'FCR CH4 other sites')
## Warning: Removed 80 rows containing missing values or values outside the scale range
## (`geom_point()`).
current_df |>
filter(Reservoir == "FCR", Site != 50) |>
ggplot(aes(x=DateTime, y=CO2_umolL, colour=as.factor(Site)))+
geom_point() +
labs(title = 'FCR CO2 other sites')
## Warning: Removed 86 rows containing missing values or values outside the scale range
## (`geom_point()`).
# BVR NOT Site 50
current_df %>%
filter(Reservoir == "BVR", Site != 50) |>
ggplot(aes(x = DateTime, y = CH4_umolL, colour = as.factor(Site)))+
geom_point() +
labs(title = 'BVR CH4 other sites')
## Warning: Removed 15 rows containing missing values or values outside the scale range
## (`geom_point()`).
current_df %>%
filter(Reservoir == "BVR", Site != 50) |>
ggplot(aes(x = DateTime, y = CO2_umolL, colour = as.factor(Site)))+
geom_point() +
labs(title = 'BVR CO2 other sites')
## Warning: Removed 14 rows containing missing values or values outside the scale range
## (`geom_point()`).
Look at this year only:
# CH4 plots current year
a <- current_df |>
filter(Depth_m<100,
Reservoir == "FCR", Site != 50,
DateTime >= '2024-01-01') |>
mutate(Site = as.factor(Site)) |>
ggplot(aes(x = DateTime, y = CH4_umolL, colour = Site))+
geom_point() +
labs(title = 'FCR CH4')
ggplotly(a)
b<- current_df |>
filter(Depth_m<100,
Reservoir == "FCR", Site == 50,
DateTime >= '2024-01-01') |>
mutate(Depth_m = as_factor(Depth_m)) |>
ggplot(aes(x = DateTime, y = CH4_umolL, colour = Depth_m))+
geom_point() +
labs(title = 'FCR Site 50, CH4')
ggplotly(b)
c <- current_df |>
filter(Depth_m<100,
Reservoir == "BVR", Site != 50,
DateTime >= '2024-01-01') |>
mutate(Site = as.factor(Site)) |>
ggplot(aes(x = DateTime, y = CH4_umolL, colour = Site))+
geom_point() +
labs(title = 'BVR CH4')
ggplotly(c)
d <- current_df |>
filter(Depth_m<100,
Reservoir == "BVR", Site == 50,
DateTime >= '2024-01-01') |>
mutate(Depth_m = as_factor(Depth_m)) |>
ggplot(aes(x = DateTime, y = CH4_umolL, colour = Depth_m))+
geom_point() +
labs(title = 'BVR Site 50, CH4')
ggplotly(d)
# CO2 plots current year
e<- current_df |>
filter(Depth_m<100,
Reservoir == "FCR", Site != 50,
DateTime >= '2024-01-01') |>
mutate(Site = as.factor(Site)) |>
ggplot(aes(x = DateTime, y = CO2_umolL, colour = Site))+
geom_point() +
labs(title = 'FCR CO2')
ggplotly(e)
f <- current_df |>
filter(Depth_m<100,
Reservoir == "FCR", Site == 50,
DateTime >= '2024-01-01') |>
mutate(Depth_m = as_factor(Depth_m)) |>
ggplot(aes(x = DateTime, y = CO2_umolL, colour = Depth_m))+
geom_point() +
labs(title = 'FCR Site 50, CO2')
ggplotly(f)
g <- current_df |>
filter(Depth_m<100,
Reservoir == "BVR", Site != 50,
DateTime >= '2024-01-01') |>
mutate(Site = as.factor(Site)) |>
ggplot(aes(x = DateTime, y = CO2_umolL, colour = Site))+
geom_point() +
labs(title = 'BVR CO2')
ggplotly(g)
h <- current_df |>
filter(Depth_m<100,
Reservoir == "BVR", Site == 50,
DateTime >= '2024-01-01') |>
mutate(Depth_m = as_factor(Depth_m)) |>
ggplot(aes(x = DateTime, y = CO2_umolL, colour = Depth_m))+
geom_point() +
labs(title = 'BVR Site 50, CO2')
ggplotly(h)
# convert datetimes to characters so that they are properly formatted in the output file
current_df$DateTime <- as.character(format(current_df$DateTime))
# Need to decide on a naming convention for this file
write.csv(current_df, "ghg_2015_2024.csv", row.names = F)